Load packages:
library(tidyverse)
library(brms)
library(ggridges)
library(patchwork)
library(ggrepel)
Load experiment data:
E1 <- read_csv('../data/E1_data.csv')
E2 <- read_csv('../data/E2_data.csv')
Load stimulus characteristics:
stims <- read_csv('../data/stimuli.csv')
## Rows: 50 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Adj, Noun, AdjMod, NounMod, ID, FullStimulus, CrossModality, Hierar...
## dbl (7): Cosine, AdjFreq, NounFreq, PairFreq, AdjFreq_log10, NounFreq_log10,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
For reproducibility, report R version, and brms package version:
R.Version()$version.string
## [1] "R version 4.1.1 (2021-08-10)"
packageVersion('brms')
## [1] '2.16.2'
packageVersion('tidyverse')
## [1] '1.3.1'
packageVersion('ggridges')
## [1] '0.5.3'
packageVersion('patchwork')
## [1] '1.1.1'
brms settings for faster computing:
options(mc.cores=parallel::detectCores())
Rename ID column to something shorter:
E1 <- rename(E1, ID = ResponseId)
E2 <- rename(E2, ID = ResponseId)
This data is complete (there is no incomplete data, as otherwise it would not be moved to completed responses from Qualtrics).
Check whether exclusions need to be performed because of the catch trial. The answer was 5:
all(E1$CatchTrial == 5)
## [1] TRUE
all(E2$CatchTrial == 5)
## [1] TRUE
Exclusions because of native speakers?
E1 %>% count(NativeSpeaker)
## # A tibble: 2 × 2
## NativeSpeaker n
## <chr> <int>
## 1 Yes, I grew up speaking English at home, but I also spoke other languag… 2
## 2 Yes, I grew up speaking only English at home 48
E2 %>% count(NativeSpeaker)
## # A tibble: 3 × 2
## NativeSpeaker n
## <chr> <int>
## 1 No, I did not grow up speaking English at home 2
## 2 Yes, I grew up speaking English at home, but I also spoke other languag… 2
## 3 Yes, I grew up speaking only English at home 46
Exclude:
E2 <- filter(E2, NativeSpeaker != "No, I did not grow up speaking English at home")
Make into long format:
E1
## # A tibble: 50 × 63
## StartDate EndDate Progress `Duration (in se… Finished RecordedDate ID
## <chr> <chr> <dbl> <dbl> <lgl> <chr> <chr>
## 1 19/04/2021… 19/04/20… 100 318 TRUE 19/04/2021 … R_D7D…
## 2 19/04/2021… 19/04/20… 100 371 TRUE 19/04/2021 … R_1zX…
## 3 19/04/2021… 19/04/20… 100 299 TRUE 19/04/2021 … R_3P5…
## 4 19/04/2021… 19/04/20… 100 1140 TRUE 19/04/2021 … R_1OU…
## 5 19/04/2021… 19/04/20… 100 788 TRUE 19/04/2021 … R_1Cp…
## 6 19/04/2021… 19/04/20… 100 755 TRUE 19/04/2021 … R_3KE…
## 7 19/04/2021… 19/04/20… 100 295 TRUE 19/04/2021 … R_3KO…
## 8 19/04/2021… 19/04/20… 100 316 TRUE 19/04/2021 … R_BzH…
## 9 19/04/2021… 19/04/20… 100 205 TRUE 19/04/2021 … R_22q…
## 10 19/04/2021… 19/04/20… 100 231 TRUE 19/04/2021 … R_1OD…
## # … with 40 more rows, and 56 more variables: CatchTrial <dbl>, Item_1 <chr>,
## # Item_2 <chr>, Item_3 <chr>, Item_4 <chr>, Item_5 <chr>, Item_6 <chr>,
## # Item_7 <chr>, Item_8 <chr>, Item_9 <chr>, Item_10 <chr>, Item_11 <chr>,
## # Item_12 <chr>, Item_13 <chr>, Item_14 <chr>, Item_15 <chr>, Item_16 <chr>,
## # Item_17 <chr>, Item_18 <chr>, Item_19 <chr>, Item_20 <chr>, Item_21 <chr>,
## # Item_22 <chr>, Item_23 <chr>, Item_24 <chr>, Item_25 <chr>, Item_26 <chr>,
## # Item_27 <chr>, Item_28 <chr>, Item_29 <chr>, Item_30 <chr>, …
Check number of participants:
nrow(E1)
## [1] 50
nrow(E2)
## [1] 48
Check number of male/female participants:
E1 %>% count(Gender)
## # A tibble: 2 × 2
## Gender n
## <chr> <int>
## 1 Female 23
## 2 Male 27
E2 %>% count(Gender)
## # A tibble: 2 × 2
## Gender n
## <chr> <int>
## 1 Female 17
## 2 Male 31
Check age range and average age:
mean(E1$Age)
## [1] 39.48
range(E1$Age)
## [1] 19 67
mean(E2$Age)
## [1] 36.16667
range(E2$Age)
## [1] 22 53
Convert into long format:
E1 <- pivot_longer(E1, Item_1:Item_50,
names_to = 'Item',
values_to = 'Response')
E2 <- pivot_longer(E2, Item_1:Item_50,
names_to = 'Item',
values_to = 'Response')
Make the response in an ordered variable:
# Experiment 1:
E1 <- mutate(E1,
Response = factor(Response,
levels = c('very literal', 'literal',
'moderately literal', 'neutral',
'moderately metaphoric', 'metaphoric',
'very metaphoric')),
ResponseNum = as.numeric(Response))
# Experiment 2:
E2 <- mutate(E2,
Response = factor(Response,
levels = c('very uncreative', 'uncreative',
'moderately uncreative', 'neutral',
'moderately creative', 'creative',
'very creative')),
ResponseNum = as.numeric(Response))
Exclude straightliners. 40 times the same response is 80%, which was our pre-registered exclusion criterion. Is anybody above that?
# Experiment 1:
E1 %>% count(ID, Response) %>%
filter(n >= 40)
## # A tibble: 0 × 3
## # … with 3 variables: ID <chr>, Response <fct>, n <int>
# Experiment 2:
E2 %>% count(ID, Response) %>%
filter(n >= 40)
## # A tibble: 0 × 3
## # … with 3 variables: ID <chr>, Response <fct>, n <int>
Merge with stimulus characteristics:
E1 <- left_join(E1, stims, by = c('Item' = 'ID'))
E2 <- left_join(E2, stims, by = c('Item' = 'ID'))
Descriptive statistics, average cosine per rating. 7 = very metaphoric; 1 = very literal
E1_means <- E1 %>% group_by(Response) %>%
summarize(M = mean(Cosine))
Descriptive statistics, average cosine per rating for experiment 2. 7 = very metaphoric; 1 = very literal
E2_means <- E2 %>% group_by(Response) %>%
summarize(M = mean(Cosine))
Make barplots of the averages:
# Experiment 1:
E1_barplot <- E1_means %>%
ggplot(aes(x = factor(Response),
y = M)) +
geom_bar(stat = 'identity') +
ylab('Average cosine') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.title.x = element_blank())
# Experiment 2:
E2_barplot <- E2_means %>%
ggplot(aes(x = factor(Response),
y = M)) +
geom_bar(stat = 'identity') +
ylab('Average cosine') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.title.x = element_blank())
Save both externally:
ggsave(plot = E1_barplot, filename = '../figures/E1_barplot.pdf',
width = 8, height = 6)
ggsave(plot = E2_barplot, filename = '../figures/E2_barplot.pdf',
width = 8, height = 6)
Show them in the R script, first, Experiment 1:
E1_barplot
Then, Experiment 2:
E2_barplot
Round both means tables for reporting:
E1_means <- mutate(E1_means, M = round(M, 2))
E2_means <- mutate(E2_means, M = round(M, 2))
Save externally:
write_csv(E1_means, '../data/E1_means.csv')
write_csv(E2_means, '../data/E2_means.csv')
Make a joy plot of this:
# Experiment 1:
E1_joy <- E1 %>% ggplot(aes(x = Cosine,
y = factor(Response))) +
xlab('Cosine similarity') +
geom_density_ridges(fill = 'steelblue', alpha = 0.8) +
scale_x_continuous(breaks = seq(0, 1, 0.2)) +
xlim(0, 1) +
theme_ridges() +
theme(axis.title.y = element_blank(),
axis.text.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold', size = 16))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# Experiment 2:
E2_joy <- E2 %>% ggplot(aes(x = Cosine,
y = factor(Response))) +
xlab('Cosine similarity') +
geom_density_ridges(fill = 'steelblue', alpha = 0.8) +
scale_x_continuous(breaks = seq(0, 1, 0.2)) +
xlim(0, 1) +
theme_ridges() +
theme(axis.title.y = element_blank(),
axis.text.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold', size = 16))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
Show these in the script, Experiment 1:
E1_joy
## Picking joint bandwidth of 0.0644
Experiment 2:
E2_joy
## Picking joint bandwidth of 0.08
Save both seperately:
ggsave(plot = E1_joy, filename = '../figures/E1_joy_plot.pdf',
width = 8, height = 6)
## Picking joint bandwidth of 0.0644
ggsave(plot = E2_joy, filename = '../figures/E2_joy_plot.pdf',
width = 8, height = 6)
## Picking joint bandwidth of 0.08
Put both together and save them in one:
E1_joy <- E1_joy + ggtitle('(a) Experiment 1') +
theme(plot.title = element_text(face = 'bold', size = 18,
margin = margin(b = 20, t = 0,
r = 0, l = 0)))
E2_joy <- E2_joy + ggtitle('(b) Experiment 2') +
theme(plot.title = element_text(face = 'bold', size = 18,
margin = margin(b = 20, t = 0,
r = 0, l = 0)))
# Put both together:
both_joy <- E1_joy + E2_joy
Show in script:
both_joy
## Picking joint bandwidth of 0.0644
## Picking joint bandwidth of 0.08
Save:
ggsave(plot = both_joy, filename = '../figures/both_joy_plots.pdf',
width = 12, height = 6)
## Picking joint bandwidth of 0.0644
## Picking joint bandwidth of 0.08
Check how corpus attestation influenced this:
E1 %>% group_by(PairAttested) %>%
summarize(M = mean(ResponseNum),
SD = sd(ResponseNum))
## # A tibble: 2 × 3
## PairAttested M SD
## <chr> <dbl> <dbl>
## 1 attested 4.04 2.15
## 2 not_attested 5.36 1.78
Recode this factor so that it becomes easier to interpret in the model (make “not attested” the reference level):
E1 <- mutate(E1,
PairAttested = factor(PairAttested,
levels = c('not_attested', 'attested')))
E2 <- mutate(E2,
PairAttested = factor(PairAttested,
levels = c('not_attested', 'attested')))
Set MCMC controls for all models:
mcmc_controls <- list(adapt_delta = 0.99, max_treedepth = 13)
Prior, a weakly informative prior on the slope coefficient with SD = 2 (pre-registered):
Note Dec 13, 2022: After acceptance of the paper, we
were made aware of Lemoine (2019), and the fact that prior choices have
to also be made with respect to the respective link functions, in this
case the logit. It turns out that SD = 2 is actually biased towards more
extreme values in probability space (0 and 1), as can be verified by
hist(plogis(rnorm(1000, 0, 2))) compared to
hist(plogis(rnorm(1000, 0, 1))). We therefore chose half of
the SD. The results remain qualitatively the same regardless of which SD
value is chosen, and the conclusions are left untouched by this choice
of priors.
weak_prior <- prior('normal(0, 1)', class = 'b')
Analyze the ratings, continuous model:
E1_cont <- brm(ResponseNum ~ 1 + Cosine +
PairAttested + AdjFreq_log10 + NounFreq_log10 +
(1 + Cosine|ID) +
(1|Adj) + (1|Noun) + (1|Item),
data = E1,
family = cumulative(),
prior = weak_prior,
# MCMC variables:
control = mcmc_controls,
cores = 4, chains = 4,
init = 0, seed = 42,
warmup = 3000, iter = 5000,
save_pars = save_pars(all = TRUE))
# Save:
save(E1_cont, file = '../models/E1_cont_mdl.Rdata',
compress = 'xz', compression_level = 9)
Load:
load('../models/E1_cont_mdl.Rdata')
Posterior predictive checks:
pp_check(E1_cont, ndraws = 100)
Analyze the ratings, categorical model:
E1_cat <- brm(ResponseNum ~ 1 + CrossModality +
PairAttested + AdjFreq_log10 + NounFreq_log10 +
(1 + CrossModality|ID) +
(1|Adj) + (1|Noun) + (1|Item),
data = E1,
family = cumulative(),
prior = weak_prior,
# MCMC variables:
control = mcmc_controls,
cores = 4, chains = 4,
init = 0, seed = 42,
warmup = 3000, iter = 5000,
save_pars = save_pars(all = TRUE))
# Save:
save(E1_cat, file = '../models/E1_cat_mdl.Rdata',
compress = 'xz', compression_level = 9)
Load the models:
load('../models/E1_cat_mdl.Rdata')
Posterior predictive checks:
pp_check(E1_cat, ndraws = 100)
Perform LOO:
E1_cont_loo <- loo(E1_cont, moment_match = TRUE)
E1_cat_loo <- loo(E1_cat, moment_match = TRUE)
# Save:
save(E1_cont_loo, file = '../models/E1_cont_loo.Rdata',
compress = 'xz', compression_level = 9)
save(E1_cat_loo, file = '../models/E1_cat_loo.Rdata',
compress = 'xz', compression_level = 9)
Load and compare:
load('../models/E1_cont_loo.Rdata')
load('../models/E1_cat_loo.Rdata')
# Compare:
E1_both_loo <- loo_compare(E1_cont_loo, E1_cat_loo)
# Show:
E1_both_loo
## elpd_diff se_diff
## E1_cont 0.0 0.0
## E1_cat -30.2 16.0
Check Bayes R2:
bayes_R2(E1_cat)
## Warning: Predictions are treated as continuous variables in 'bayes_R2' which is
## likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.6026071 0.006211609 0.589824 0.6143055
bayes_R2(E1_cont)
## Warning: Predictions are treated as continuous variables in 'bayes_R2' which is
## likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.6087409 0.006250185 0.5960922 0.6203633
Interpret the model:
summary(E1_cont)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: ResponseNum ~ Cosine + PairAttested + AdjFreq_log10 + NounFreq_log10 + (1 + Cosine | ID) + (1 | Adj) + (1 | Noun) + (1 | Item)
## Data: E1 (Number of observations: 2500)
## Draws: 4 chains, each with iter = 5000; warmup = 3000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~Adj (Number of levels: 21)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.48 0.31 0.03 1.16 1.00 853 2392
##
## ~ID (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 1.91 0.23 1.51 2.42 1.00 1321
## sd(Cosine) 2.82 0.35 2.23 3.59 1.00 1561
## cor(Intercept,Cosine) -0.91 0.03 -0.96 -0.84 1.00 2161
## Tail_ESS
## sd(Intercept) 2684
## sd(Cosine) 2925
## cor(Intercept,Cosine) 3093
##
## ~Item (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.06 0.18 0.72 1.44 1.00 1570 2717
##
## ~Noun (Number of levels: 18)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.50 0.28 0.04 1.11 1.00 1101 3119
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -4.24 1.72 -7.67 -0.89 1.00 3992
## Intercept[2] -2.89 1.72 -6.32 0.49 1.00 3970
## Intercept[3] -1.77 1.72 -5.18 1.59 1.00 3974
## Intercept[4] -1.37 1.72 -4.80 1.97 1.00 3972
## Intercept[5] -0.27 1.72 -3.71 3.08 1.00 3966
## Intercept[6] 1.50 1.72 -1.92 4.86 1.00 3964
## Cosine -3.55 0.66 -4.82 -2.21 1.00 3173
## PairAttestedattested -1.16 0.45 -2.04 -0.28 1.00 2863
## AdjFreq_log10 0.43 0.26 -0.08 0.94 1.00 4148
## NounFreq_log10 0.03 0.35 -0.67 0.72 1.00 4043
## Tail_ESS
## Intercept[1] 4876
## Intercept[2] 5074
## Intercept[3] 5145
## Intercept[4] 5126
## Intercept[5] 5010
## Intercept[6] 4975
## Cosine 4119
## PairAttestedattested 4068
## AdjFreq_log10 4672
## NounFreq_log10 4962
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(E1_cat)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: ResponseNum ~ CrossModality + PairAttested + AdjFreq_log10 + NounFreq_log10 + (1 + CrossModality | ID) + (1 | Adj) + (1 | Noun) + (1 | Item)
## Data: E1 (Number of observations: 2500)
## Draws: 4 chains, each with iter = 5000; warmup = 3000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~Adj (Number of levels: 21)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.87 0.32 0.21 1.51 1.01 1151 1302
##
## ~ID (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 0.98 0.11 0.78 1.23 1.00
## sd(CrossModalityunimodal) 1.89 0.24 1.47 2.41 1.00
## cor(Intercept,CrossModalityunimodal) -0.68 0.09 -0.83 -0.47 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 1862 3696
## sd(CrossModalityunimodal) 2337 4471
## cor(Intercept,CrossModalityunimodal) 2108 3805
##
## ~Item (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.80 0.20 0.45 1.22 1.01 1137 2666
##
## ~Noun (Number of levels: 18)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.57 0.29 0.05 1.20 1.01 1143 1892
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -2.11 1.65 -5.40 1.10 1.00 5093
## Intercept[2] -0.69 1.65 -3.95 2.54 1.00 5115
## Intercept[3] 0.45 1.65 -2.81 3.66 1.00 5092
## Intercept[4] 0.85 1.66 -2.41 4.07 1.00 5099
## Intercept[5] 1.94 1.66 -1.33 5.15 1.00 5082
## Intercept[6] 3.64 1.66 0.37 6.88 1.00 5101
## CrossModalityunimodal -3.22 0.48 -4.13 -2.26 1.00 3051
## PairAttestedattested -1.18 0.39 -1.97 -0.45 1.00 3601
## AdjFreq_log10 0.51 0.28 -0.04 1.09 1.00 3868
## NounFreq_log10 0.23 0.34 -0.47 0.86 1.00 4532
## Tail_ESS
## Intercept[1] 5540
## Intercept[2] 5692
## Intercept[3] 5627
## Intercept[4] 5703
## Intercept[5] 5656
## Intercept[6] 5736
## CrossModalityunimodal 4046
## PairAttestedattested 4888
## AdjFreq_log10 4940
## NounFreq_log10 5087
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Check all effects:
E1_posts <- posterior_samples(E1_cont) %>%
select(b_Cosine:b_NounFreq_log10)
## Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
## recommended alternatives.
# Extract posteriors:
b_cosine <- E1_posts$b_Cosine
b_attested <- E1_posts$b_PairAttestedattested
b_adj <- E1_posts$b_AdjFreq_log10
b_noun <- E1_posts$b_NounFreq_log10
Check:
sum(b_cosine > 0) / length(b_cosine)
## [1] 0
sum(b_attested < 0) / length(b_attested)
## [1] 0.99525
sum(b_adj < 0) / length(b_adj)
## [1] 0.045
sum(b_noun < 0) / length(b_noun)
## [1] 0.460875
Analyze the ratings, continuous model:
E2_cont <- brm(ResponseNum ~ 1 + Cosine +
PairAttested + AdjFreq_log10 + NounFreq_log10 +
(1 + Cosine|ID) +
(1|Adj) + (1|Noun) + (1|Item),
data = E2,
family = cumulative(),
prior = weak_prior,
# MCMC variables:
control = mcmc_controls,
cores = 4, chains = 4,
init = 0, seed = 42,
warmup = 3000, iter = 5000,
save_pars = save_pars(all = TRUE))
# Save:
save(E2_cont, file = '../models/E2_cont_mdl.Rdata',
compress = 'xz', compression_level = 9)
Load:
load('../models/E2_cont_mdl.Rdata')
Posterior predictive checks:
pp_check(E2_cont, ndraws = 100)
Analyze the ratings, categorical model:
E2_cat <- brm(ResponseNum ~ 1 + CrossModality +
PairAttested + AdjFreq_log10 + NounFreq_log10 +
(1 + CrossModality|ID) +
(1|Adj) + (1|Noun) + (1|Item),
data = E2,
family = cumulative(),
prior = weak_prior,
# MCMC variables:
control = mcmc_controls,
cores = 4, chains = 4,
init = 0, seed = 42,
warmup = 3000, iter = 5000,
save_pars = save_pars(all = TRUE))
# Save:
save(E2_cat, file = '../models/E2_cat_mdl.Rdata',
compress = 'xz', compression_level = 9)
Load:
load('../models/E2_cat_mdl.Rdata')
Posterior predictive checks:
pp_check(E2_cat, ndraws = 100)
Perform LOO:
E2_cont_loo <- loo(E2_cont, moment_match = TRUE)
E2_cat_loo <- loo(E2_cat, moment_match = TRUE)
# Save:
save(E2_cont_loo, file = '../models/E2_cont_loo.Rdata',
compress = 'xz', compression_level = 9)
save(E2_cat_loo, file = '../models/E2_cat_loo.Rdata',
compress = 'xz', compression_level = 9)
Load and compare:
load('../models/E2_cont_loo.Rdata')
load('../models/E2_cat_loo.Rdata')
# compare:
E2_both_loo <- loo_compare(E2_cont_loo, E2_cat_loo)
# show:
E2_both_loo
## elpd_diff se_diff
## E2_cont 0.0 0.0
## E2_cat -18.1 14.2
Check Bayes R2:
bayes_R2(E2_cat)
## Warning: Predictions are treated as continuous variables in 'bayes_R2' which is
## likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.3726984 0.0104388 0.3518433 0.3925688
bayes_R2(E2_cont)
## Warning: Predictions are treated as continuous variables in 'bayes_R2' which is
## likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.3835328 0.0104528 0.3627906 0.4038017
Interpret the model:
summary(E2_cont)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: ResponseNum ~ Cosine + PairAttested + AdjFreq_log10 + NounFreq_log10 + (1 + Cosine | ID) + (1 | Adj) + (1 | Noun) + (1 | Item)
## Data: E2 (Number of observations: 2400)
## Draws: 4 chains, each with iter = 5000; warmup = 3000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~Adj (Number of levels: 21)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.51 0.21 0.09 0.92 1.00 874 949
##
## ~ID (Number of levels: 48)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 1.42 0.17 1.12 1.80 1.00 1844
## sd(Cosine) 2.21 0.27 1.73 2.81 1.00 1568
## cor(Intercept,Cosine) -0.76 0.07 -0.87 -0.59 1.00 1650
## Tail_ESS
## sd(Intercept) 3610
## sd(Cosine) 2926
## cor(Intercept,Cosine) 2963
##
## ~Item (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.38 0.14 0.12 0.66 1.00 744 1184
##
## ~Noun (Number of levels: 18)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.40 0.16 0.08 0.75 1.00 1386 1460
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -5.74 1.13 -7.99 -3.51 1.00 2590
## Intercept[2] -4.38 1.13 -6.63 -2.16 1.00 2572
## Intercept[3] -3.48 1.13 -5.72 -1.26 1.00 2560
## Intercept[4] -2.92 1.13 -5.15 -0.69 1.00 2553
## Intercept[5] -1.47 1.13 -3.71 0.76 1.00 2544
## Intercept[6] 0.21 1.13 -2.02 2.45 1.00 2586
## Cosine -1.62 0.45 -2.47 -0.72 1.00 2733
## PairAttestedattested -0.51 0.30 -1.13 0.04 1.00 1457
## AdjFreq_log10 -0.03 0.17 -0.39 0.30 1.00 3147
## NounFreq_log10 -0.41 0.22 -0.86 0.02 1.00 2991
## Tail_ESS
## Intercept[1] 4287
## Intercept[2] 4067
## Intercept[3] 4360
## Intercept[4] 4455
## Intercept[5] 4385
## Intercept[6] 4267
## Cosine 3937
## PairAttestedattested 3578
## AdjFreq_log10 4026
## NounFreq_log10 3356
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(E2_cat)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: ResponseNum ~ CrossModality + PairAttested + AdjFreq_log10 + NounFreq_log10 + (1 + CrossModality | ID) + (1 | Adj) + (1 | Noun) + (1 | Item)
## Data: E2 (Number of observations: 2400)
## Draws: 4 chains, each with iter = 5000; warmup = 3000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~Adj (Number of levels: 21)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.63 0.16 0.34 0.99 1.00 1368 1031
##
## ~ID (Number of levels: 48)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 0.95 0.11 0.75 1.20 1.00
## sd(CrossModalityunimodal) 1.56 0.21 1.20 2.01 1.00
## cor(Intercept,CrossModalityunimodal) -0.30 0.15 -0.57 0.02 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 1732 3776
## sd(CrossModalityunimodal) 2385 4480
## cor(Intercept,CrossModalityunimodal) 2373 3858
##
## ~Item (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.18 0.12 0.01 0.46 1.00 960 918
##
## ~Noun (Number of levels: 18)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.52 0.15 0.28 0.86 1.00 1898 2576
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -4.63 1.13 -6.89 -2.40 1.00 3910
## Intercept[2] -3.25 1.13 -5.50 -1.02 1.00 3906
## Intercept[3] -2.35 1.13 -4.61 -0.13 1.00 3898
## Intercept[4] -1.80 1.13 -4.04 0.42 1.00 3898
## Intercept[5] -0.37 1.13 -2.62 1.84 1.00 3894
## Intercept[6] 1.27 1.13 -0.98 3.50 1.00 3909
## CrossModalityunimodal -1.34 0.30 -1.94 -0.74 1.00 2544
## PairAttestedattested -0.61 0.20 -1.02 -0.23 1.00 2734
## AdjFreq_log10 0.06 0.18 -0.30 0.43 1.00 3042
## NounFreq_log10 -0.32 0.23 -0.79 0.13 1.00 4178
## Tail_ESS
## Intercept[1] 5020
## Intercept[2] 4964
## Intercept[3] 4932
## Intercept[4] 4971
## Intercept[5] 4972
## Intercept[6] 4940
## CrossModalityunimodal 3694
## PairAttestedattested 2400
## AdjFreq_log10 4505
## NounFreq_log10 4331
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Check all effects:
E2_posts <- posterior_samples(E2_cont) %>%
select(b_Cosine:b_NounFreq_log10)
## Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
## recommended alternatives.
# Extract posteriors:
b_cosine <- E2_posts$b_Cosine
b_attested <- E2_posts$b_PairAttestedattested
b_adj <- E2_posts$b_AdjFreq_log10
b_noun <- E2_posts$b_NounFreq_log10
Check:
sum(b_cosine > 0) / length(b_cosine)
## [1] 0.00025
sum(b_attested > 0) / length(b_attested)
## [1] 0.0355
sum(b_adj > 0) / length(b_adj)
## [1] 0.45425
sum(b_noun > 0) / length(b_noun)
## [1] 0.0325
Make a coefficient plot. First make both tables of posterior estimates into long format:
# Experiment 1:
E1_posts_long <- pivot_longer(E1_posts,
cols = b_Cosine:b_NounFreq_log10,
values_to = 'Estimate',
names_to = 'Coefficient')
# Experiment 2:
E2_posts_long <- pivot_longer(E2_posts,
cols = b_Cosine:b_NounFreq_log10,
values_to = 'Estimate',
names_to = 'Coefficient')
Get 95% CIs:
# Experiment 1:
E1_coefs <- E1_posts_long %>% group_by(Coefficient) %>%
summarize(M = mean(Estimate),
LowerCI = quantile(Estimate, 0.025),
UpperCI = quantile(Estimate, 0.975))
# Experiment 2:
E2_coefs <- E2_posts_long %>% group_by(Coefficient) %>%
summarize(M = mean(Estimate),
LowerCI = quantile(Estimate, 0.025),
UpperCI = quantile(Estimate, 0.975))
Rename the coefficients so that they are prettier for plotting:
# Experiment 1:
E1_coefs <- mutate(E1_coefs,
Coefficient = ifelse(Coefficient == 'b_PairAttestedattested',
'Corpus attestation', Coefficient),
Coefficient = ifelse(Coefficient == 'b_AdjFreq_log10',
'Adjective frequency', Coefficient),
Coefficient = ifelse(Coefficient == 'b_NounFreq_log10',
'Noun frequency', Coefficient),
Coefficient = ifelse(Coefficient == 'b_Cosine',
'Cosine similarity', Coefficient))
# Experiment 2:
E2_coefs <- mutate(E2_coefs,
Coefficient = ifelse(Coefficient == 'b_PairAttestedattested',
'Corpus attestation', Coefficient),
Coefficient = ifelse(Coefficient == 'b_AdjFreq_log10',
'Adjective frequency', Coefficient),
Coefficient = ifelse(Coefficient == 'b_NounFreq_log10',
'Noun frequency', Coefficient),
Coefficient = ifelse(Coefficient == 'b_Cosine',
'Cosine similarity', Coefficient))
Plot this, Experiment 1:
# Setting up the plot basics:
E1_coef_p <- E1_coefs %>% ggplot(aes(x = M, y = reorder(Coefficient, M),
xmin = LowerCI, xmax = UpperCI))
# Adding geoms:
E1_coef_p <- E1_coef_p +
geom_point(pch = 15, size = 3) +
geom_errorbar(width = 0.2) +
geom_vline(xintercept = 0, linetype = 2)
# Axes:
E1_coef_p <- E1_coef_p +
coord_cartesian(xlim = c(-5, 1)) +
scale_x_continuous(breaks = seq(-5, 1, 1)) +
xlab('Model coefficient') +
ylab(NULL)
# Cosmetics:
E1_coef_p <- E1_coef_p +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(face = 'bold', size = 16,
margin = margin(t = 10, b = 0,
r = 0, l = 0)),
axis.text.y = element_text(face = 'bold', size = 14),
axis.text.x = element_text(size = 12))
E1_coef_p
ggsave(plot = E1_coef_p, filename = '../figures/E1_coefficient_plot.pdf',
width = 8, height = 4)
Plot this, Experiment 2:
# Setting up the plot basics:
E2_coef_p <- E2_coefs %>% ggplot(aes(x = M, y = reorder(Coefficient, M),
xmin = LowerCI, xmax = UpperCI))
# Adding geoms:
E2_coef_p <- E2_coef_p +
geom_point(pch = 15, size = 3) +
geom_errorbar(width = 0.2) +
geom_vline(xintercept = 0, linetype = 2)
# Axes:
E2_coef_p <- E2_coef_p +
coord_cartesian(xlim = c(-3, 1)) +
scale_x_continuous(breaks = seq(-3, 1, 1)) +
xlab('Model coefficient') +
ylab(NULL)
# Cosmetics:
E2_coef_p <- E2_coef_p +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(face = 'bold', size = 16,
margin = margin(t = 10, b = 0,
r = 0, l = 0)),
axis.text.y = element_text(face = 'bold', size = 14),
axis.text.x = element_text(size = 12))
E2_coef_p
ggsave(plot = E2_coef_p, filename = '../figures/E2_coefficient_plot.pdf',
width = 8, height = 4)
Put them both into the same plot:
E1_coef_p <- E1_coef_p + ggtitle('(a) Experiment 1') +
theme(plot.title = element_text(face = 'bold', size = 18,
margin = margin(b = 20, t = 0,
r = 0, l = 0)))
E2_coef_p <- E2_coef_p + ggtitle('(b) Experiment 2') +
theme(plot.title = element_text(face = 'bold', size = 18,
margin = margin(b = 20, t = 0,
r = 0, l = 0)))
# Save:
both_coef_p <- E1_coef_p + plot_spacer() + E2_coef_p +
plot_layout(nrow = 3, heights = c(5, 1, 5))
ggsave(plot = both_coef_p, filename = '../figures/both_coefficient_plots.pdf',
width = 12, height = 12)
Get item-based averages for E1 and E2:
E1_items <- E1 %>%
group_by(Item) %>%
summarize(Metaphoricity = mean(ResponseNum))
E2_items <- E2 %>%
group_by(Item) %>%
summarize(Creativity = mean(ResponseNum))
Merge:
both <- left_join(E1_items, E2_items)
## Joining, by = "Item"
Add stimulus characteristics for plotting:
both <- left_join(both,
select(stims, ID, Adj, Noun), by = c('Item' = 'ID'))
Merge the adjective and noun pair into string for plotting:
both <- mutate(both,
Pair = str_c(Adj, ' ', Noun))
Z-scored variables for Bayesian correlation:
both <- mutate(both,
Met_z = Metaphoricity - mean(Metaphoricity),
Met_z = Met_z / sd(Met_z),
Creat_z = Creativity - mean(Creativity),
Creat_z = Creat_z / sd(Creat_z))
Fit the model (no hand-specified priors here):
cor_mdl <- brm(Creat_z ~ -1 + Met_z,
data = both)
# Save model:
save(cor_mdl, file = '../models/correlation_mdl.Rdata',
compress = 'xz', compression_level = 9)
Load and summarize the models:
# Load:
load('../models/correlation_mdl.Rdata')
# Summarize:
summary(cor_mdl)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Creat_z ~ -1 + Met_z
## Data: both (Number of observations: 50)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Met_z 0.84 0.08 0.68 1.00 1.00 2807 1959
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.55 0.06 0.45 0.67 1.00 2592 2375
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Get the coefficient:
int <- fixef(cor_mdl)[1, ]
Show the correlation:
set.seed(666) # for sampling subset
both_p <- both %>% ggplot(aes(x = Metaphoricity, y = Creativity))
# Add geoms:
both_p <- both_p +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(aes(label = Pair),
max.overlaps = Inf,
box.padding = 0.5,
min.segment.length = 0)
# Cosmetics:
both_p <- both_p +
theme_classic() +
theme() +
theme(axis.title.y = element_text(face = 'bold', size = 16,
margin = margin(t = 0, b = 0,
r = 10, l = 0)),
axis.title.x = element_text(face = 'bold', size = 16,
margin = margin(t = 10, b = 0,
r = 0, l = 0)),
axis.text.y = element_text(face = 'bold', size = 14),
axis.text.x = element_text(face = 'bold', size = 14))
# Show and save:
both_p
ggsave(plot = both_p, filename = '../figures/correlation.pdf',
width = 8, height = 6)
This completes this analysis.